WITH GLOBAL_SPINE AS ( SELECT ROW_NUMBER() OVER ( ORDERBY NULL ) as INTERVAL_ID, DATEADD( 'day', (INTERVAL_ID - 1), '2020-01-01T00:00' :: timestamp_ntz ) as SPINE_START, DATEADD( 'day', INTERVAL_ID, '2020-01-01T00:00' :: timestamp_ntz ) as SPINE_END FROM TABLE ( GENERATOR(ROWCOUNT => 1097) ) ), GROUPSAS ( SELECT product, MIN(sales_date) AS LOCAL_START, MAX(sales_date) AS LOCAL_END FROM My_First_Table GROUPBY product ), GROUP_SPINE AS ( SELECT product, SPINE_START AS GROUP_START, SPINE_END AS GROUP_END FROM GROUPS G CROSS JOINLATERAL ( SELECT SPINE_START, SPINE_END FROM GLOBAL_SPINE S WHERE S.SPINE_START >= G.LOCAL_START ) ) SELECT G.product AS GROUP_BY_product, GROUP_START, GROUP_END, T.* FROM GROUP_SPINE G LEFTJOIN My_First_Table T ON sales_date >= G.GROUP_START AND sales_date < G.GROUP_END AND G.product = T.product;
SELECT Student, MATHEMATICS, GEOGRAPHY, PHYS_ED FROM ( SELECT Student, Grade, Subject FROM skool) PIVOT ( AVG ( Grade ) FOR Subject IN ( 'Mathematics', 'Geography', 'Phys Ed' ) ) as p ( Student, MATHEMATICS, GEOGRAPHY, PHYS_ED );
结果:
3 One-Hot编码(或“虚拟”变量)
One-Hot编码是一种将分类变量转换为数字变量的方法。
这里是一个使用STATE作为列进行独热编码的示例。
之前:
SELECT *, CASEWHEN State = 'AL'THEN1ELSE0ENDas STATE_AL, CASEWHEN State = 'AK'THEN1ELSE0ENDas STATE_AK, CASEWHEN State = 'AZ'THEN1ELSE0ENDas STATE_AZ, CASEWHEN State = 'AR'THEN1ELSE0ENDas STATE_AR, CASEWHEN State = 'AS'THEN1ELSE0ENDas STATE_AS, CASEWHEN State = 'CA'THEN1ELSE0ENDas STATE_CA, CASEWHEN State = 'CO'THEN1ELSE0ENDas STATE_CO, CASEWHEN State = 'CT'THEN1ELSE0ENDas STATE_CT, CASEWHEN State = 'DC'THEN1ELSE0ENDas STATE_DC, CASEWHEN State = 'FL'THEN1ELSE0ENDas STATE_FL, CASEWHEN State = 'GA'THEN1ELSE0ENDas STATE_GA, CASEWHEN State = 'HI'THEN1ELSE0ENDas STATE_HI, CASEWHEN State = 'ID'THEN1ELSE0ENDas STATE_ID, CASEWHEN State = 'IL'THEN1ELSE0ENDas STATE_IL, CASEWHEN State = 'IN'THEN1ELSE0ENDas STATE_IN, CASEWHEN State = 'IA'THEN1ELSE0ENDas STATE_IA, CASEWHEN State = 'KS'THEN1ELSE0ENDas STATE_KS, CASEWHEN State = 'KY'THEN1ELSE0ENDas STATE_KY, CASEWHEN State = 'LA'THEN1ELSE0ENDas STATE_LA, CASEWHEN State = 'ME'THEN1ELSE0ENDas STATE_ME, CASEWHEN State = 'MD'THEN1ELSE0ENDas STATE_MD, CASEWHEN State = 'MA'THEN1ELSE0ENDas STATE_MA, CASEWHEN State = 'MI'THEN1ELSE0ENDas STATE_MI, CASEWHEN State = 'MN'THEN1ELSE0ENDas STATE_MN, CASEWHEN State = 'MS'THEN1ELSE0ENDas STATE_MS, CASEWHEN State = 'MO'THEN1ELSE0ENDas STATE_MO, CASEWHEN State = 'MT'THEN1ELSE0ENDas STATE_MT, CASEWHEN State = 'NE'THEN1ELSE0ENDas STATE_NE, CASEWHEN State = 'NV'THEN1ELSE0ENDas STATE_NV, CASEWHEN State = 'NH'THEN1ELSE0ENDas STATE_NH, CASEWHEN State = 'NJ'THEN1ELSE0ENDas STATE_NJ, CASEWHEN State = 'NM'THEN1ELSE0ENDas STATE_NM, CASEWHEN State = 'NY'THEN1ELSE0ENDas STATE_NY, CASEWHEN State = 'NC'THEN1ELSE0ENDas STATE_NC, CASEWHEN State = 'ND'THEN1ELSE0ENDas STATE_ND, CASEWHEN State = 'OH'THEN1ELSE0ENDas STATE_OH, CASEWHEN State = 'OK'THEN1ELSE0ENDas STATE_OK, CASEWHEN State = 'OR'THEN1ELSE0ENDas STATE_OR, CASEWHEN State = 'PA'THEN1ELSE0ENDas STATE_PA, CASEWHEN State = 'RI'THEN1ELSE0ENDas STATE_RI, CASEWHEN State = 'SC'THEN1ELSE0ENDas STATE_SC, CASEWHEN State = 'SD'THEN1ELSE0ENDas STATE_SD, CASEWHEN State = 'TN'THEN1ELSE0ENDas STATE_TN, CASEWHEN State = 'TX'THEN1ELSE0ENDas STATE_TX, CASEWHEN State = 'UT'THEN1ELSE0ENDas STATE_UT, CASEWHEN State = 'VT'THEN1ELSE0ENDas STATE_VT, CASEWHEN State = 'VA'THEN1ELSE0ENDas STATE_VA, CASEWHEN State = 'WA'THEN1ELSE0ENDas STATE_WA, CASEWHEN State = 'WV'THEN1ELSE0ENDas STATE_WV, CASEWHEN State = 'WI'THEN1ELSE0ENDas STATE_WI, CASEWHEN State = 'WY'THEN1ELSE0ENDas STATE_WY FROM BABYTABLE;
WITH order_detail as ( SELECT SALESORDERNUMBER, listagg(ENGLISHPRODUCTNAME, ', ') WITHINgroup ( orderby ENGLISHPRODUCTNAME ) as ENGLISHPRODUCTNAME_listagg, COUNT(ENGLISHPRODUCTNAME) as num_products FROM transactions GROUPBY SALESORDERNUMBER ) SELECT ENGLISHPRODUCTNAME_listagg, count(SALESORDERNUMBER) as NumTransactions FROM order_detail where num_products > 1 GROUPBY ENGLISHPRODUCTNAME_listagg orderby count(SALESORDERNUMBER) desc;
WITH BASIC_OFFSET_14DAY AS ( SELECT A.CustomerID, A.TransactionDate, AVG(B.PurchaseAmount) as AVG_PURCHASEAMOUNT_PAST14DAY, MAX(B.PurchaseAmount) as MAX_PURCHASEAMOUNT_PAST14DAY, COUNT(DISTINCT B.TransactionID) as COUNT_DISTINCT_TRANSACTIONID_PAST14DAY FROM My_First_Table A INNERJOIN My_First_Table B ON A.CustomerID = B.CustomerID AND1 = 1 WHERE B.TransactionDate >= DATEADD(day, -14, A.TransactionDate) AND B.TransactionDate <= A.TransactionDate GROUPBY A.CustomerID, A.TransactionDate ), BASIC_OFFSET_90DAY AS ( SELECT A.CustomerID, A.TransactionDate, AVG(B.PurchaseAmount) as AVG_PURCHASEAMOUNT_PAST90DAY, MAX(B.PurchaseAmount) as MAX_PURCHASEAMOUNT_PAST90DAY, COUNT(DISTINCT B.TransactionID) as COUNT_DISTINCT_TRANSACTIONID_PAST90DAY FROM My_First_Table A INNERJOIN My_First_Table B ON A.CustomerID = B.CustomerID AND1 = 1 WHERE B.TransactionDate >= DATEADD(day, -90, A.TransactionDate) AND B.TransactionDate <= A.TransactionDate GROUPBY A.CustomerID, A.TransactionDate ), BASIC_OFFSET_180DAY AS ( SELECT A.CustomerID, A.TransactionDate, AVG(B.PurchaseAmount) as AVG_PURCHASEAMOUNT_PAST180DAY, MAX(B.PurchaseAmount) as MAX_PURCHASEAMOUNT_PAST180DAY, COUNT(DISTINCT B.TransactionID) as COUNT_DISTINCT_TRANSACTIONID_PAST180DAY FROM My_First_Table A INNERJOIN My_First_Table B ON A.CustomerID = B.CustomerID AND1 = 1 WHERE B.TransactionDate >= DATEADD(day, -180, A.TransactionDate) AND B.TransactionDate <= A.TransactionDate GROUPBY A.CustomerID, A.TransactionDate ) SELECT src.*, BASIC_OFFSET_14DAY.AVG_PURCHASEAMOUNT_PAST14DAY, BASIC_OFFSET_14DAY.MAX_PURCHASEAMOUNT_PAST14DAY, BASIC_OFFSET_14DAY.COUNT_DISTINCT_TRANSACTIONID_PAST14DAY, BASIC_OFFSET_90DAY.AVG_PURCHASEAMOUNT_PAST90DAY, BASIC_OFFSET_90DAY.MAX_PURCHASEAMOUNT_PAST90DAY, BASIC_OFFSET_90DAY.COUNT_DISTINCT_TRANSACTIONID_PAST90DAY, BASIC_OFFSET_180DAY.AVG_PURCHASEAMOUNT_PAST180DAY, BASIC_OFFSET_180DAY.MAX_PURCHASEAMOUNT_PAST180DAY, BASIC_OFFSET_180DAY.COUNT_DISTINCT_TRANSACTIONID_PAST180DAY FROM My_First_Table src LEFTOUTERJOIN BASIC_OFFSET_14DAY ON BASIC_OFFSET_14DAY.TransactionDate = src.TransactionDate AND BASIC_OFFSET_14DAY.CustomerID = src.CustomerID LEFTOUTERJOIN BASIC_OFFSET_90DAY ON BASIC_OFFSET_90DAY.TransactionDate = src.TransactionDate AND BASIC_OFFSET_90DAY.CustomerID = src.CustomerID LEFTOUTERJOIN BASIC_OFFSET_180DAY ON BASIC_OFFSET_180DAY.TransactionDate = src.TransactionDate AND BASIC_OFFSET_180DAY.CustomerID = src.CustomerID;